In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
In [2]:
df = pd.read_csv('log.csv', parse_dates=['Joined', 'Left', 'Time'])
df['Time'] = df['Left'] - df['Joined']
df = df[df['Member Name'] != 'Sauron']
df
Out[2]:
Member Id Member Name Joined Left Time
0 210743674525450240 stratos1805 2021-04-27 02:21:53.125370 2021-04-27 02:33:41.455374 0 days 00:11:48.330004
1 132415133711466496 סדאם חוסיין ז"ל 2021-04-27 02:21:53.125370 2021-04-27 02:35:55.345365 0 days 00:14:02.219995
2 192642893159202816 MrStormagedon 2021-04-27 02:21:53.125370 2021-04-27 03:10:15.058915 0 days 00:48:21.933545
3 193006567455457280 Cardi Biton 2021-04-27 12:44:58.546623 2021-04-27 12:56:30.695541 0 days 00:11:32.148918
4 192986627998613504 rone 2021-04-27 13:55:12.166887 2021-04-27 14:14:01.761058 0 days 00:18:49.594171
... ... ... ... ... ...
345 192660675733094401 יניבצ'וק 2021-05-12 21:04:12.257532 2021-05-12 23:13:01.931482 0 days 02:08:49.673950
346 132415133711466496 סדאם חוסיין ז"ל 2021-05-12 23:18:00.797346 2021-05-12 23:19:11.275895 0 days 00:01:10.478549
347 193006567455457280 Cardi Biton 2021-05-12 22:38:23.822513 2021-05-12 23:35:31.647488 0 days 00:57:07.824975
348 193006567455457280 Cardi Biton 2021-05-13 00:02:22.450375 2021-05-13 00:39:49.312508 0 days 00:37:26.862133
349 193006567455457280 Cardi Biton 2021-05-13 00:40:55.792746 2021-05-13 00:41:59.237103 0 days 00:01:03.444357

350 rows × 5 columns

In [19]:
min_datetime = df['Joined'].min()
max_datetime = df['Left'].max()
min_date, max_date = min_datetime.date(), max_datetime.date()
unique_members = df['Member Name'].unique().tolist()
colormap = plt.cm.tab20(np.linspace(0, 1, len(unique_members)))

print(f'Data collected over {max_datetime - min_datetime} with {len(unique_members)} unique members')
Data collected over 15 days 22:20:10.802191 with 18 unique members
In [42]:
sessions = df.groupby('Member Name')['Time'].sum().dt.seconds / 3600
sessions.index = pd.CategoricalIndex(sessions.index, unique_members)
sessions = sessions.sort_values(ascending=False)
sessions.plot(use_index=True, kind='bar', figsize=(20, 5), title='Average Session Length', xlabel='', ylabel='Hours', rot=0, color=colormap)
plt.show()
2021-05-16T22:40:12.941617 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [4]:
start = min_datetime.replace(minute=0, second=0, microsecond=0)
end = max_datetime.replace(minute=0, second=0, microsecond=0) + pd.offsets.Hour(1)
attendance_data = []
date = start
while date < end:
    members = df[(df['Joined'] <= date + pd.offsets.Minute(1)) & (df['Left'] >= date)]['Member Name'].to_list()
    attendance_data.append([date] + [member in members for member in unique_members])
    date += pd.offsets.Minute(1)
att_df = pd.DataFrame(attendance_data, columns=['Date'] + unique_members)
In [5]:
axes = att_df.set_index('Date').rolling(1).mean().plot(figsize=(14, 24), yticks=[0, 1], subplots=True, sharex=True, kind='area')
for ax in axes:
    ax.set_yticklabels(['Disconnected', 'Connected'])
plt.subplots_adjust()
plt.show()
2021-05-16T21:38:13.738125 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [43]:
def part_of_day(date):
    if 0 <= date.hour < 6:
        return '00:00 - 05:59'
    elif 6 <= date.hour < 12:
        return '06:00 - 11:59'
    elif 12 <= date.hour < 18:
        return '12:00 - 17:59'
    else:
        return '18:00 - 23:59'

weekday_order = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']

daily_att = att_df.groupby(att_df['Date'].dt.day_name()).sum() / 60
daily_att.index = pd.CategoricalIndex(daily_att.index, weekday_order)
daily_att = daily_att.sort_index()
hourly_att = att_df.groupby(att_df['Date'].apply(part_of_day)).sum() / 60
all_att = att_df.sum() / 60

daily_att.plot(use_index=True, kind='bar', figsize=(20, 5), title='Daily Attendance', xlabel='Weekday', rot=0, color=colormap)
plt.show()
hourly_att.plot(use_index=True, kind='bar', figsize=(20, 5), title='Hourly Attendance', xlabel='Time', rot=0, color=colormap)
plt.show()
all_att.sort_values(ascending=False).plot(use_index=True, kind='bar', figsize=(20, 5), title='Total Attendance', xlabel=None, ylabel='Hours', rot=0, color=colormap)
plt.show()
2021-05-16T22:41:09.956941 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
2021-05-16T22:41:10.501500 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
2021-05-16T22:41:10.843585 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [7]:
corr = att_df.corr()
corr
Out[7]:
stratos1805 סדאם חוסיין ז"ל MrStormagedon Cardi Biton rone יניבצ'וק October Detka XPEZNAZ NightSeeker Groovy Rozi xWarLord_Sharon פליציק OMGitsyuvalT Katya dantzi
stratos1805 1.000000 0.573854 0.306401 0.320414 0.108825 0.260161 0.359134 0.083490 0.336118 0.325899 0.163929 0.136142 -0.008702 0.087041 0.132129 -0.017479 0.054608
סדאם חוסיין ז"ל 0.573854 1.000000 0.400392 0.359864 0.157746 0.270571 0.345845 0.173942 0.482206 0.299051 0.256762 0.071171 0.022812 0.061495 0.167033 -0.025481 0.037459
MrStormagedon 0.306401 0.400392 1.000000 0.221086 0.069042 0.223963 0.308753 0.293573 0.225351 0.206389 0.179688 0.087392 -0.031332 0.055703 0.026169 0.005760 -0.015362
Cardi Biton 0.320414 0.359864 0.221086 1.000000 0.134003 0.261088 0.329396 0.022397 0.241395 0.172527 0.036961 -0.019066 -0.015748 -0.017900 0.148718 -0.013625 -0.010476
rone 0.108825 0.157746 0.069042 0.134003 1.000000 0.278954 0.175256 -0.031094 0.243413 0.063362 0.156747 -0.017935 0.378900 -0.007931 0.086769 -0.012751 -0.006980
יניבצ'וק 0.260161 0.270571 0.223963 0.261088 0.278954 1.000000 0.639065 -0.061103 0.168574 0.159559 0.083888 -0.028465 0.085319 0.071676 0.150203 0.071664 0.041948
October 0.359134 0.345845 0.308753 0.329396 0.175256 0.639065 1.000000 -0.044817 0.219865 0.118975 0.162547 -0.029494 0.005918 0.066947 0.006722 0.074372 0.040710
Detka 0.083490 0.173942 0.293573 0.022397 -0.031094 -0.061103 -0.044817 1.000000 0.085771 0.209457 0.035738 0.245054 -0.027279 -0.009111 -0.031285 -0.009741 -0.005332
XPEZNAZ 0.336118 0.482206 0.225351 0.241395 0.243413 0.168574 0.219865 0.085771 1.000000 0.361037 0.096790 -0.025176 -0.043875 0.088032 0.133161 -0.017899 0.053324
NightSeeker 0.325899 0.299051 0.206389 0.172527 0.063362 0.159559 0.118975 0.209457 0.361037 1.000000 0.082156 0.223783 -0.029041 -0.009700 0.030238 -0.010370 -0.005677
Groovy 0.163929 0.256762 0.179688 0.036961 0.156747 0.083888 0.162547 0.035738 0.096790 0.082156 1.000000 0.100472 -0.020234 0.106160 0.053477 -0.007225 -0.003955
Rozi 0.136142 0.071171 0.087392 -0.019066 -0.017935 -0.028465 -0.029494 0.245054 -0.025176 0.223783 0.100472 1.000000 -0.006868 -0.002294 -0.007877 -0.002453 -0.001343
xWarLord_Sharon -0.008702 0.022812 -0.031332 -0.015748 0.378900 0.085319 0.005918 -0.027279 -0.043875 -0.029041 -0.020234 -0.006868 1.000000 0.005106 -0.015682 -0.004883 -0.002673
פליציק 0.087041 0.061495 0.055703 -0.017900 -0.007931 0.071676 0.066947 -0.009111 0.088032 -0.009700 0.106160 -0.002294 0.005106 1.000000 -0.005238 -0.001631 -0.000893
OMGitsyuvalT 0.132129 0.167033 0.026169 0.148718 0.086769 0.150203 0.006722 -0.031285 0.133161 0.030238 0.053477 -0.007877 -0.015682 -0.005238 1.000000 -0.005600 0.127063
Katya -0.017479 -0.025481 0.005760 -0.013625 -0.012751 0.071664 0.074372 -0.009741 -0.017899 -0.010370 -0.007225 -0.002453 -0.004883 -0.001631 -0.005600 1.000000 -0.000954
dantzi 0.054608 0.037459 -0.015362 -0.010476 -0.006980 0.041948 0.040710 -0.005332 0.053324 -0.005677 -0.003955 -0.001343 -0.002673 -0.000893 0.127063 -0.000954 1.000000
In [8]:
plt.figure(figsize=(20, 20))
sns.heatmap(corr, cmap='YlGnBu')
plt.xticks(rotation=0)
plt.show()
2021-05-16T21:38:19.935555 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [9]:
plt.figure(figsize=(20, 20))
sns.heatmap(corr[(abs(corr) > 0.3)], cmap='YlGnBu')
plt.xticks(rotation=0)
plt.show()
2021-05-16T21:38:20.755363 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [10]:
pd.DataFrame(corr.unstack().sort_values().drop_duplicates())
Out[10]:
0
יניבצ'וק Detka -0.061103
Detka October -0.044817
XPEZNAZ xWarLord_Sharon -0.043875
MrStormagedon xWarLord_Sharon -0.031332
OMGitsyuvalT Detka -0.031285
... ... ...
סדאם חוסיין ז"ל MrStormagedon 0.400392
XPEZNAZ סדאם חוסיין ז"ל 0.482206
סדאם חוסיין ז"ל stratos1805 0.573854
יניבצ'וק October 0.639065
stratos1805 stratos1805 1.000000

137 rows × 1 columns